Condor Instruments - Complete sleep analysis demonstration

Julius A. P. P. de Paula

jp@condorinst.com.br

1) Package installation and upgrade

In [1]:
!pip install wget # installs library for file download
!pip install xgboost --upgrade # upgrades package used in offwrist algorithm
Requirement already satisfied: wget in /home/julius/.local/lib/python3.8/site-packages (3.2)
Requirement already up-to-date: xgboost in /home/julius/.local/lib/python3.8/site-packages (1.6.1)
Requirement already satisfied, skipping upgrade: scipy in /home/julius/.local/lib/python3.8/site-packages (from xgboost) (1.8.1)
Requirement already satisfied, skipping upgrade: numpy in /home/julius/.local/lib/python3.8/site-packages (from xgboost) (1.23.1)

2) Importing packages

In [2]:
# these packages are required for obtaining the path to the current file
import sys
import inspect
import os
root = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) # path to "this" directory
In [3]:
import numpy as np # mathmatics library
import pandas as pd # data science library
In [5]:
# dependency download
import wget
URL = "https://github.com/Condor-Instruments/actigraphy-tutorials-sample/blob/master/demo-dependencies.zip?raw=true"
response = wget.download(URL, "demo-dependencies.zip")

# file unzip
import zipfile
with zipfile.ZipFile("demo-dependencies.zip", 'r') as zip_ref:
  zip_ref.extractall(root)

from crespo_wrapper import crespo_wrapper # algorithm for main sleep period detection

from logread import LogRead as lr # class for log file reading

from offwrist_wrapper import offwrist_wrapper # algorithm for offwrist detection

from colekripke import ColeKripke as ck # algorithm for WASO detection

from nights_df import nights_df # helper algorithm for daily processing

from simple_actogram import actigraphy_single_plot_actogram
100% [......................................................] 3998118 / 3998118

3) Reading files

For this demonstration we've made 3 files available: input0.txt, input1.txt and input2.txt

In [6]:
file = "input1.txt" # file subject to analysis

df = lr(file).data # with LogRead class the file is read to a DataFrame from pandas library
df = df.loc[0:int(len(df)/3),:] # in this example, we'll only analyze a portion of the data
print(df)
                 DATE/TIME  MS  EVENT  TEMPERATURE  EXT TEMPERATURE  \
0      27/05/2021 11:10:15   0      0        24.24            23.94   
1      27/05/2021 11:11:15   0      0        24.36            24.06   
2      27/05/2021 11:12:15   0      0        24.42            23.94   
3      27/05/2021 11:13:15   0      0        24.41            23.88   
4      27/05/2021 11:14:15   0      0        24.36            23.81   
...                    ...  ..    ...          ...              ...   
25394  14/06/2021 03:17:44   0      0        32.36            31.38   
25395  14/06/2021 03:18:44   0      0        32.39            31.44   
25396  14/06/2021 03:19:44   0      0        32.40            31.44   
25397  14/06/2021 03:20:44   0      0        32.43            31.44   
25398  14/06/2021 03:21:44   0      0        32.46            31.50   

       ORIENTATION   PIM       PIMn  TAT          TATn  ...          ZCMn  \
0               65  4856   0.000003  278  1.713810e-07  ...  6.349740e-08   
1               68  4483  74.716700  285  4.750000e+00  ...  1.500000e+00   
2               68   425   7.083330   40  6.666670e-01  ...  5.000000e-02   
3               68   873  14.550000   56  9.333330e-01  ...  2.666670e-01   
4               68   413   6.883330   31  5.166670e-01  ...  1.666670e-02   
...            ...   ...        ...  ...           ...  ...           ...   
25394           80     0   0.000000    0  0.000000e+00  ...  0.000000e+00   
25395           80     0   0.000000    0  0.000000e+00  ...  0.000000e+00   
25396           80     0   0.000000    0  0.000000e+00  ...  0.000000e+00   
25397           80     0   0.000000    0  0.000000e+00  ...  0.000000e+00   
25398           80     0   0.000000    0  0.000000e+00  ...  0.000000e+00   

       LIGHT  AMB LIGHT  RED LIGHT  GREEN LIGHT  BLUE LIGHT  IR LIGHT  \
0      26.04      14.19       5.03         6.54        4.55      5.63   
1      12.62       6.95       2.55         3.17        2.06      6.32   
2      41.56      22.73       8.56        10.43        6.20     13.79   
3      43.87      23.95       9.01        11.01        6.50     14.23   
4      34.62      18.88       7.09         8.69        5.11     11.09   
...      ...        ...        ...          ...         ...       ...   
25394   0.00       0.00       0.00         0.00        0.00      0.00   
25395   0.00       0.00       0.00         0.00        0.00      0.00   
25396   0.00       0.00       0.00         0.00        0.00      0.00   
25397   0.00       0.00       0.00         0.00        0.00      0.00   
25398   0.00       0.00       0.00         0.00        0.00      0.00   

       UVA LIGHT  UVB LIGHT  STATE  
0            0.0       0.00      0  
1            0.0       0.00      0  
2            0.0       1.43      0  
3            0.0       1.90      0  
4            0.0       1.43      0  
...          ...        ...    ...  
25394        0.0       0.00      1  
25395        0.0       0.00      1  
25396        0.0       0.00      1  
25397        0.0       0.00      1  
25398        0.0       0.00      1  

[25399 rows x 21 columns]

4) Preparing the input DataFrame

In [7]:
# columns need to renamed before being fed to the algorithms
df = df.rename(columns={"DATE/TIME":"datetime",
                        "PIM":"activity",
                        "TEMPERATURE":"int_temp",
                        "EXT TEMPERATURE":"ext_temp"})

# state-related columns will be separated for better visuallization, all of them will be initially filled with zeros
df["state"] = np.zeros(len(df))
df["offwrist"] = np.zeros(len(df)) 
df["sleep"] = np.zeros(len(df)) 

int_temp = df["int_temp"].to_numpy()
ext_temp = df["ext_temp"].to_numpy()
int_temp = np.where(int_temp > 0, int_temp, 0)
ext_temp = np.where(ext_temp > 0, ext_temp, 0)
int_temp = np.where(int_temp < 42 , int_temp, 42)
ext_temp = np.where(ext_temp < 42 , ext_temp, 42)
df["int_temp"] = int_temp
df["ext_temp"] = ext_temp

# pre-scaled temperatures will be used for plotting
scale = np.max([np.max(ext_temp),np.max(int_temp)])
df["int_temp_"] = int_temp/scale
df["ext_temp_"] = ext_temp/scale

print(df)
                  datetime  MS  EVENT  int_temp  ext_temp  ORIENTATION  \
0      27/05/2021 11:10:15   0      0     24.24     23.94           65   
1      27/05/2021 11:11:15   0      0     24.36     24.06           68   
2      27/05/2021 11:12:15   0      0     24.42     23.94           68   
3      27/05/2021 11:13:15   0      0     24.41     23.88           68   
4      27/05/2021 11:14:15   0      0     24.36     23.81           68   
...                    ...  ..    ...       ...       ...          ...   
25394  14/06/2021 03:17:44   0      0     32.36     31.38           80   
25395  14/06/2021 03:18:44   0      0     32.39     31.44           80   
25396  14/06/2021 03:19:44   0      0     32.40     31.44           80   
25397  14/06/2021 03:20:44   0      0     32.43     31.44           80   
25398  14/06/2021 03:21:44   0      0     32.46     31.50           80   

       activity       PIMn  TAT          TATn  ...  BLUE LIGHT  IR LIGHT  \
0          4856   0.000003  278  1.713810e-07  ...        4.55      5.63   
1          4483  74.716700  285  4.750000e+00  ...        2.06      6.32   
2           425   7.083330   40  6.666670e-01  ...        6.20     13.79   
3           873  14.550000   56  9.333330e-01  ...        6.50     14.23   
4           413   6.883330   31  5.166670e-01  ...        5.11     11.09   
...         ...        ...  ...           ...  ...         ...       ...   
25394         0   0.000000    0  0.000000e+00  ...        0.00      0.00   
25395         0   0.000000    0  0.000000e+00  ...        0.00      0.00   
25396         0   0.000000    0  0.000000e+00  ...        0.00      0.00   
25397         0   0.000000    0  0.000000e+00  ...        0.00      0.00   
25398         0   0.000000    0  0.000000e+00  ...        0.00      0.00   

       UVA LIGHT  UVB LIGHT  STATE  state  offwrist  sleep  int_temp_  \
0            0.0       0.00      0    0.0       0.0    0.0   0.676528   
1            0.0       0.00      0    0.0       0.0    0.0   0.679877   
2            0.0       1.43      0    0.0       0.0    0.0   0.681552   
3            0.0       1.90      0    0.0       0.0    0.0   0.681273   
4            0.0       1.43      0    0.0       0.0    0.0   0.679877   
...          ...        ...    ...    ...       ...    ...        ...   
25394        0.0       0.00      1    0.0       0.0    0.0   0.903154   
25395        0.0       0.00      1    0.0       0.0    0.0   0.903991   
25396        0.0       0.00      1    0.0       0.0    0.0   0.904270   
25397        0.0       0.00      1    0.0       0.0    0.0   0.905107   
25398        0.0       0.00      1    0.0       0.0    0.0   0.905945   

       ext_temp_  
0       0.668155  
1       0.671504  
2       0.668155  
3       0.666481  
4       0.664527  
...          ...  
25394   0.875802  
25395   0.877477  
25396   0.877477  
25397   0.877477  
25398   0.879152  

[25399 rows x 26 columns]

5) Offwrist

The algorithm for offwrist period detection is meant to filter out of the analysis the moments when the subject is not wearing the actigraph. It is based on the Gradient Boosting algorithm provided by the XGBoost library. With the activity measure PIM and internal and external temperature, new auxiliary variables are computed and all are fed to the algorithm to generate a classification.

In [8]:
out = offwrist_wrapper(df) # offwrist detection

# column updates
df["state"] = out
df["offwrist"] = 0.25*out 

# we'll use actograms for visuallizing the data
fig = actigraphy_single_plot_actogram(df, ["activity", "int_temp_","ext_temp_","sleep","offwrist"], [False, True, True, True, True], 12, dt = "datetime")
fig.show()
/home/julius/Área de Trabalho/functions.py:14: RuntimeWarning: invalid value encountered in long_scalars
  return  np.sum(np.where(a <= thresh,1,0))/n

6) Main sleep periods

The algorithm for main sleep period detection is based on an implementation of the Crespo algorithm that was initially described in the scientific literature by Crespo et al in 2012. The algorithm consists on a delimitation of the periods of high and low activity inside the time series through a percentile-based thresholding operation. After this initial delimitation, a refinement procedure takes place using metrics that we've developed.

In [9]:
out = crespo_wrapper(df) # main sleep period detection (bed time and getup time)

df["state"] = out
df["sleep"] = np.where(out == 4,0,out)

fig = actigraphy_single_plot_actogram(df, ["activity", "int_temp_","ext_temp_","sleep","offwrist"], [False, True, True, True, True], 12, dt = "datetime")
fig.show()

7) WASO

The Wakefullness After Sleep Onset detection uses our implementation of an algorithm described in the scientific literature by Cole et al in 1992. It consists on a weighted sum rolling window operation followed by a thresholding operation. In our implementation we use a different window size and weights, we choose to do so based on the results we got from optimization studies carried with AI and other statistical techniques.

In [10]:
onwrist = np.where(out == 4, False, True) # actigraph on the wrist mask

# input variables read only in the offwrist periods
stamps = df["datetime"].to_numpy()[onwrist]
zcm = df["ZCMn"].to_numpy()[onwrist]

n = len(zcm) # time-series length
state = np.zeros(n) # auxiliary array to compute new states 

in_bed = out[onwrist] # this array acts as a sleep journal, we get the information of whether or not the subject is in bed
nights = nights_df(stamps,in_bed,wake_thresh=240,search_gap=False) # night segregation

num_nights = len(nights) # number of nights present in the time series

# nightly sleep statistics arrays
waso = np.nan*np.ones(num_nights)
tbt = np.nan*np.ones(num_nights)
tst = np.nan*np.ones(num_nights)
sol = np.nan*np.ones(num_nights)
soi = np.nan*np.ones(num_nights)
nw = np.nan*np.ones(num_nights)
eff = np.nan*np.ones(num_nights)
bts = []
gts = []

for i in range(num_nights):
    bt = nights.at[i,"bt"] # bed time index
    gt = nights.at[i,"gt"] # getup time index

    bts.append(stamps[bt]) # bed time 
    gts.append(stamps[gt]) # getup time

    input = zcm[bt:gt]
    cole = ck(input, # WASO computations are carried nightly
              P=0.000464,
              weights_before=[34.5,133,529,375,408,400.5,1074,2048.5,2424.5],
              weights_after=[1920,149.5,257.5,125,111.5,120,69,40.5],
              )
    cole.model(np.zeros(gt-bt))

    cpred = cole.filtered_weighted # states on the current night

    if i == 3:
      save = pd.DataFrame([],columns=["stamps","zcm","cole","state"])
      save["zcm"] = input
      save["cole"] = cole.weighted
      save["state"] = cpred
      save["stamps"] = stamps[bt:gt]

      save.to_csv("save.csv",sep=';',header=True,index=False, index_label=None)
    
    # SOL computation
    latency = 0
    while cpred[latency] > 0:
        latency += 1

    # SOI computation
    innertia = len(cpred)-1
    while cpred[innertia] > 0:
        innertia -= 1

    # Computing the number of wake periods during the night
    edges = np.diff(cpred)
    num_awake = np.sum(np.where(edges>0,1,0))

    sol[i] = latency
    soi[i] = len(cpred)-1-innertia
    waso[i] = np.sum(cpred[latency:innertia])
    nw[i] = num_awake
    tbt[i] = gt-bt
    tst[i] = tbt[i]-waso[i]-soi[i]-sol[i]
    eff[i] = tst[i]/tbt[i]
    
    state[bt:gt] = 1-cpred

nights["tbt"] = tbt
nights["waso"] = waso
nights["sol"] = sol
nights["soi"] = soi
nights["tst"] = tst
nights["nw"] = nw
nights["eff"] = eff

nights.insert(0,"gts",gts)
nights.insert(0,"bts",bts)

out[onwrist] = state

df["state"] = out
df["sleep"] = np.where(out == 4,0,out)

print(nights)
                    bts                  gts     bt     gt    tbt   waso  sol  \
0   27/05/2021 23:42:15  28/05/2021 06:54:15    752   1184  432.0   17.0  8.0   
1   28/05/2021 23:30:15  29/05/2021 06:56:15   2180   2626  446.0   20.0  8.0   
2   30/05/2021 22:18:15  31/05/2021 07:55:15   4729   5306  577.0   28.0  0.0   
3   31/05/2021 23:24:15  01/06/2021 07:22:15   6202   6680  478.0   19.0  0.0   
4   02/06/2021 00:09:15  02/06/2021 07:21:15   7687   8119  432.0   23.0  0.0   
5   02/06/2021 23:36:15  03/06/2021 07:47:15   8805   9296  491.0    8.0  0.0   
6   04/06/2021 00:00:54  04/06/2021 07:43:54  10234  10697  463.0   10.0  0.0   
7   04/06/2021 23:36:54  06/06/2021 06:20:54  11650  12535  885.0  133.0  8.0   
8   07/06/2021 00:13:54  07/06/2021 06:59:54  13608  14014  406.0    6.0  0.0   
9   07/06/2021 23:53:54  08/06/2021 08:54:54  15028  15569  541.0   12.0  0.0   
10  08/06/2021 23:25:54  09/06/2021 07:19:54  16440  16914  474.0   28.0  0.0   
11  10/06/2021 00:27:54  10/06/2021 11:07:54  17655  18295  640.0   65.0  0.0   
12  11/06/2021 00:11:44  11/06/2021 06:34:44  19061  19444  383.0    4.0  0.0   
13  12/06/2021 00:06:44  12/06/2021 08:33:44  20415  20922  507.0   29.0  0.0   
14  12/06/2021 23:39:44  13/06/2021 08:09:44  21828  22338  510.0   22.0  0.0   
15  13/06/2021 23:53:44  14/06/2021 03:18:44  23282  23487  205.0    0.0  0.0   

    soi    tst    nw       eff  
0   1.0  406.0   8.0  0.939815  
1   0.0  418.0   5.0  0.937220  
2   0.0  549.0   8.0  0.951473  
3   0.0  459.0   6.0  0.960251  
4   0.0  409.0   6.0  0.946759  
5   1.0  482.0   2.0  0.981670  
6   3.0  450.0   4.0  0.971922  
7   0.0  744.0  10.0  0.840678  
8   0.0  400.0   2.0  0.985222  
9   0.0  529.0   4.0  0.977819  
10  0.0  446.0   5.0  0.940928  
11  0.0  575.0  14.0  0.898438  
12  1.0  378.0   2.0  0.986945  
13  0.0  478.0   4.0  0.942801  
14  2.0  486.0   6.0  0.952941  
15  0.0  205.0   0.0  1.000000  
In [11]:
fig = actigraphy_single_plot_actogram(df, ["activity", "int_temp_","ext_temp_","sleep","offwrist"], [False, True, True, True, True], 12, dt = "datetime")
fig.show()